Omix_vignette_CV_MTG.RmdThe goal of Omix is to provide tools in R to build a complete analysis workflow for integrative analysis of data generated from multi-omics platform.
Generate a multi-omics object using
MultiAssayExperiment.
Quality control of single-omics data.
Formatting, normalisation, denoising of single-omics data.
Separate single-omics analyses.
Integration of multi-omics data for combined analysis.
Publication quality plots and interactive analysis reports based of shinyApp.
Currently, Omix supports the integration of bulk transcriptomics and bulk proteomics.
The Omix pipeline requires the following input:
rawdata_rna: A data-frame containing raw RNA count
with rownames as gene and colnames as samples.
rawdata_protein: A data-frame containing protein
abundance with rownames as proteins and colnames as samples.
map_rna: A data-frame of two columns named
primary and colname where primary should
contain unique sample name with a link to sample metadata and colname is
the column names of the rawdata_rna data-frame.
map_protein: A data-frame of two columns named
primary and colname where primary should
contain unique sample name with a link to sample metadata and colname is
the column names of the rawdata_protein
data-frame.
metadata_rna: A data-frame containing
rna assay specific metadata where rownames are same as the
colnames of rawdata_rna data.frame.
metadata_protein: A data-frame containing
protein assay specific metadata where rownames are same as
the colnames of rawdata_protein data.frame.
individual_metadata: A data-frame containing
individual level metadata for both omics assays.
Call required packages for vignette:
library(Omix)##
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
##
##
First we must load the data from Omix for the vignette:
outputDir <- tempdir()
rawdata_rna_fp <- file.path(outputDir, "raw_counts_MTG_CV.rds")
rawdata_protein_fp <- file.path(outputDir, "raw_proteomics_MTG_CV.rds")
map_rna_fp <- file.path(outputDir, "map_RNA_MTG_CV.rds")
map_prot_fp <- file.path(outputDir, "map_prot_MTG_CV.rds")
metadata_rna_fp <- file.path(outputDir, "metadata_rna_MTG_CV.rds")
metadata_prot_fp <- file.path(outputDir, "metadata_prot_MTG_CV.rds")
individual_metadata_fp <-file.path(outputDir, "metadata_map_MTG_CV_new.rds")
ctd_fp <-file.path(outputDir, "ctd.rds")
ensembl_fp <- file.path(outputDir, "ensembl.rds")
GO_fp <- file.path(outputDir, "GO_Biological_Process_2021.txt")
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/raw_counts_MTG_CV.rds",destfile=rawdata_rna_fp,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/raw_proteomics_MTG_CV.rds",destfile=rawdata_protein_fp,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/map_RNA_MTG_CV.rds",destfile=map_rna_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/map_prot_MTG_CV.rds",destfile=map_prot_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_rna_MTG_CV.rds",destfile=metadata_rna_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_prot_MTG_CV.rds",destfile=metadata_prot_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_map_MTG_CV_new.rds",destfile=individual_metadata_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/GO_Biological_Process_2021.txt",destfile=GO_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/ctd-2.rds",destfile=ctd_fp ,headers = c(Authorization = paste("token", "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")))
download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/ensembl_mappings_human.tsv",destfile=ensembl_fp ,headers = c(Authorization = paste("token", "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")))
rawdata_rna <- as.data.frame(readRDS(paste0(outputDir,'/raw_counts_MTG_CV.rds')))
rawdata_protein <- as.data.frame(readRDS(paste0(outputDir,'/raw_proteomics_MTG_CV.rds')))
map_rna <- as.data.frame(readRDS(paste0(outputDir,'/map_RNA_MTG_CV.rds')))
map_prot <- as.data.frame(readRDS(paste0(outputDir,'/map_prot_MTG_CV.rds')))
metadata_rna<- as.data.frame(readRDS(paste0(outputDir,'/metadata_rna_MTG_CV.rds')))
metadata_prot <- as.data.frame(readRDS(paste0(outputDir,'/metadata_prot_MTG_CV.rds')))
individual_metadata <- as.data.frame(readRDS(paste0(outputDir,'/metadata_map_MTG_CV_new.rds')))
ctd <- readRDS(paste0(outputDir,'/ctd.rds'))
ensembl <-read.delim(file=paste0(outputDir,'/ensembl.rds'), sep = '\t', header = TRUE)
rna_qc_data_matrix <- NULLSanity check
## [1] TRUE
## [1] TRUE
rawdata_rna is a data-frame of raw counts, with features
as rows and samples as columns
print(rawdata_rna[1:5,1:5])## IGF117756 IGF117759 IGF117764 IGF117772 IGF117778
## ENSG00000223972 1 1 1 0 0
## ENSG00000227232 40 30 18 15 35
## ENSG00000278267 1 0 3 4 3
## ENSG00000243485 0 0 0 0 0
## ENSG00000284332 0 0 0 0 0
rawdata_protein is a data-frame of raw protein
abundances, with features as rows and samples as columns
print(rawdata_protein[15:20,15:20])## 20110137_MTG_P1WG4_001 20110272_MTG_P1WG7_001
## TUBAL3 7158.6100 9201.4900
## FBLL1 67.4334 60.1956
## CASTOR2 17.3159 26.2499
## UNC119;UNC119B 20.3847 12.3418
## YJEFN3 NA 23.4309
## ARHGAP32 11.9989 17.0831
## 20129990_MTG_P1WH4_001 A277s12_MTG_P3WD1_001
## TUBAL3 7294.23000 8982.00000
## FBLL1 85.75520 62.94880
## CASTOR2 15.63710 19.52160
## UNC119;UNC119B 19.64770 8.60717
## YJEFN3 NA 25.40000
## ARHGAP32 5.57716 9.02670
## A272s15_MTG_P3WC8_001 A297s16_MTG_P3WD7_001
## TUBAL3 10871.4000 8487.21000
## FBLL1 46.9068 57.40500
## CASTOR2 NA 22.37250
## UNC119;UNC119B NA 17.10210
## YJEFN3 19.7786 16.63510
## ARHGAP32 NA 7.95878
Maps are data frame containing two columns: primary and
colname. The primary column should be the
individual ID the individual metadata, and the colname the
matched sample names from the raw matrices columns. If sample and
individual ids are the same, maps aren’t needed (primary and colnames
are the same).
## primary colname
## 1 PDC016-MTG PDC016_MTG_P3WH10_001
## 2 PDC026-MTG PDC026_MTG_P4WA3_001
## 3 A019/12-MTG A019s12_MTG_P2WD10_001
## 4 20130400-MTG 20130400_MTG_P1WH9_001
## 5 A127/11-MTG A127s11_MTG_P2WH3_001
## 6 A053/11-MTG A053s11_MTG_P2WF5_001
## primary colname
## 1 A115/17-SSC IGF117745
## 2 19950030-SSC IGF117746
## 3 A210/05-MTG IGF117747
## 4 20100742-SSC IGF117748
## 5 19920001-SSC IGF117749
## 6 A138/12-MTG IGF117750
Technical metadata are data-frames contain the column
colname that should match the sample names in the raw
matrices, and any additional columns related to technical artefacts like
batches
## sample_id sample_name RIN Amyloid_Beta pTau
## IGF117756 19930222-MTG IGF117756 2.7 2.027670 6.98931000
## IGF117759 20129990-MTG IGF117759 2.0 3.262270 2.55134000
## IGF117764 A127/11-MTG IGF117764 4.6 0.147259 0.00247153
## IGF117772 20100742-MTG IGF117772 7.5 NA NA
## IGF117778 20110137-MTG IGF117778 4.8 3.828680 2.77941000
## IGF117784 19930148-MTG IGF117784 1.7 5.015050 2.94966000
## pct_pTau_Positive PHF1
## IGF117756 5.25975000 2.420650000
## IGF117759 0.37497600 1.564080000
## IGF117764 0.00876194 0.000519246
## IGF117772 NA NA
## IGF117778 2.56623000 NA
## IGF117784 0.71418700 2.409610000
## sample_id sample_name batch
## PDC016_MTG_P3WH10_001 PDC016-MTG PDC016_MTG_P3WH10_001 P3
## PDC026_MTG_P4WA3_001 PDC026-MTG PDC026_MTG_P4WA3_001 P4
## A019s12_MTG_P2WD10_001 A019/12-MTG A019s12_MTG_P2WD10_001 P2
## 20130400_MTG_P1WH9_001 20130400-MTG 20130400_MTG_P1WH9_001 P1
## A127s11_MTG_P2WH3_001 A127/11-MTG A127s11_MTG_P2WH3_001 P2
## A053s11_MTG_P2WF5_001 A053/11-MTG A053s11_MTG_P2WF5_001 P2
individual_metadata should contain individual level
metadata, where one column matches the primary column in
maps.
## case_id brain_region age trem2_all BBN_ID sex diagnosis apoe_final Braak
## 1 PDC016 MTG 91 CV BBN_10099 F Control E3/E3 2
## 2 PDC026 MTG 80 CV BBN_10109 F Control E2/E3 2
## 3 A019/12 MTG 92 CV BBN_10611 F AD E3/E3 3
## 4 20130400 MTG 65 CV BBN_14801 F Control E3/E3 1
## 5 A127/11 MTG 73 CV BBN_16213 M Control E3/E3 0
## 6 A053/11 MTG 77 CV BBN_18399 M Control E3/E3 0
## PMD sample_id sample_name amyloid pTau PHF1 AD
## 1 22 PDC016-MTG PDC016_MTG_P3WH10_001 0.5852850 0.00649899 0.001686760 0
## 2 23 PDC026-MTG PDC026_MTG_P4WA3_001 0.7826840 0.92852500 0.678769000 0
## 3 30 A019/12-MTG A019s12_MTG_P2WD10_001 0.3078820 0.01312750 0.017976000 1
## 4 47 20130400-MTG 20130400_MTG_P1WH9_001 NA NA NA 0
## 5 23 A127/11-MTG A127s11_MTG_P2WH3_001 0.1472590 0.00247153 0.000519246 0
## 6 11 A053/11-MTG A053s11_MTG_P2WF5_001 0.0455756 0.00222531 0.010463800 0
## MTG SSC
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
To run Omix, we first need to generate a multi-omics object The package currently supports transcriptomics and proteomics bulk data only.
# Convert metadata type from character to factor
individual_metadata$diagnosis <- factor(individual_metadata$diagnosis,
levels = c("Control", "AD"))
individual_metadata$sex <- factor(individual_metadata$sex)
#Generate multiassay object
multiomics_object_MTG=generate_multiassay(rawdata_rna =rawdata_rna,
rawdata_protein = rawdata_protein,
individual_to_sample=FALSE,
map_rna = map_rna,
map_protein = map_prot,
metadata_rna = metadata_rna,
metadata_protein = metadata_prot,
individual_metadata = individual_metadata,
map_by_column = 'sample_id',
rna_qc_data=FALSE,
rna_qc_data_matrix=NULL,
organism='human')## ✔ Ensembl ID conversion to gene symbol
## ✔ Retrieval of gene biotype
## harmonizing input:
## removing 58 sampleMap rows with 'colname' not in colnames of experiments
## ✔ RNA raw data loaded
## class: SummarizedExperiment
## dim: 58884 29
## metadata(1): metadata
## assays(1): rna_raw
## rownames(58884): ENSG00000223972 ENSG00000227232 ... ENSG00000277475
## ENSG00000268674
## rowData names(3): ensembl_gene_id gene_name gene_biotype
## colnames(29): IGF117756 IGF117759 ... IGF117836 IGF117837
## colData names(7): sample_id sample_name ... pct_pTau_Positive PHF1
## ✔ Protein raw data loaded
## class: SummarizedExperiment
## dim: 3228 39
## metadata(1): metadata
## assays(1): protein_raw
## rownames(3228):
## IGKV2-28;IGKV2-29;IGKV2-30;IGKV2-40;IGKV2D-26;IGKV2D-28;IGKV2D-29;IGKV2D-30;IGKV2D-40
## IGKV3-11;IGKV3D-11 ... FAM169A SEC23IP
## rowData names(1): gene_name
## colnames(39): PDC016_MTG_P3WH10_001 PDC026_MTG_P4WA3_001 ...
## 20196919_MTG_P2WD1_001 20196928_MTG_P2WD4_001
## colData names(3): sample_id sample_name batch
## ✔ MultiAssayExperiment object generated!
The MultiAssayExperiment object was succesfully created. The
following steps will process and perform QC on each omic layers of the
multiassay object.
multiomics_object_MTG=process_rna(multiassay=multiomics_object_MTG,
transformation='vst',
protein_coding=TRUE,
min_count = 10,
min_sample = 0.5,
dependent = "diagnosis",
levels = c("Control","AD"),
covariates=c('age','sex','PMD'),
filter=TRUE,
batch_correction=TRUE,
batch=NULL,
remove_sample_outliers= FALSE)## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## ✔ NORMALISATION & TRANSFORMATION
## ✔ GENE FILTERING
## ✔ Keeping only protein coding genes
## ✔ 39155 / 58884 non protein coding genes were dropped
## ✔ 19729 protein coding genes kept for analysis
## ✔ QC parameters selected require genes to have at least 50 % of samples with counts of 10 or more
## ✔ 4884 / 19729 genes were dropped
## ✔ 14845 genes kept for analysis
## ✔ VST TRANSFORMATION
## ✔ BATCH CORRECTION
## ✔ Using Limma on to remove technical artefacts, and age as biological confoundersUsing Limma on to remove technical artefacts, and sex as biological confoundersUsing Limma on to remove technical artefacts, and PMD as biological confounders
## ✔ Transcriptomics data processed!
## ✔ Processing parameters saved in metadata
multiomics_object_MTG=process_protein(
multiassay=multiomics_object_MTG,
filter=TRUE,
min_sample = 0.5,
dependent = "diagnosis",
levels = c("Control","AD"),
imputation = 'minimum_value',
remove_feature_outliers= FALSE,
batch_correction= FALSE,
batch="batch",
correction_method="median_centering",
remove_sample_outliers=FALSE,
denoise=TRUE,
covariates=c('PMD','sex','age'))## ✔ SCALING NORMALIZATION
## ✔ FILTERING
## ✔ 322 / 3228 proteins filtered
## ✔ 2906 proteins kept for analysis
## ✔ IMPUTATION
## ✔ Imputation of remaining missing values based on
## 50% of minimum level of abundance for each protein
## ✔ DENOISING BIOLOGICAL COVARIATES
## ✔ Using Limma on PMD as biological confoundersUsing Limma on sex as biological confoundersUsing Limma on age as biological confounders
The processing parameter are stored in the object
Now we run a differential expression analysis between groups of
interest, where the first level, here control is the reference. The
function requires to run process_rna first, with covariates
of interest. These are adjusted for in the DE.
multiomics_object_MTG=rna_DE_analysis(multiassay=multiomics_object_MTG,
dependent='diagnosis',
levels=c('Control','AD'),
filter_protein_coding = TRUE,
log2FoldChange = 0.2)## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
Differential expression results for transcripts are saved in the
multiomics_object_MTG@metadata$DEG slot
DT::datatable(multiomics_object_MTG@metadata$DEG$ADvsControl,
rownames = FALSE,
escape = FALSE,
options = list(pageLength = 20,
scrollX=T,
autoWidth = TRUE,
columnDefs = list(list(width = '500px', targets = 1)),
dom = 'Blfrtip'))Volcano plot of differentially expressed genes is saved in the
multiomics_object_MTG@metadata$DEG$plot slot
volcano_interactive(multiomics_object_MTG@metadata[["DEG"]]$ADvsControl, padj=0.05)Now we run a differential expression analysis between groups of
interest, where the first level, here control is the reference. If the
data is already denoised for covariates of interest
process_rna, set covariates as `NULL``
multiomics_object_MTG=protein_DE_analysis(multiomics_object_MTG,
slot="protein_processed",
dependent='diagnosis',
covariates=NULL,
levels=c('Control','AD'),
log2FoldChange = 0.2)Differential expression results for proteins are saved in the
multiomics_object_MTG@metadata$DEP slot
DT::datatable(multiomics_object_MTG@metadata$DEP$ADvsControl,
rownames = FALSE,
escape = FALSE,
options = list(pageLength = 20,
scrollX=T,
autoWidth = TRUE,
columnDefs = list(list(width = '500px', targets = 1)),
dom = 'Blfrtip'))Volcano plot of differentially abundant proteins is saved in the
multiomics_object_MTG@metadata$DEP$plot slot
volcano_interactive(multiomics_object_MTG@metadata[["DEP"]]$ADvsControl, padj=0.05)Since the magnitude of changes are not directly comparable between platforms, but their directionality are, we assessed whether significant differential features overlapped between the two platforms and whether their direction of effects were concordant or discordant.
DE_comparison=single_omic_comparisons(multiassay=multiomics_object_MTG,
slot='ADvsControl',
threshold = 0.05,
pvalue = 'pval',
filtering_options=NULL)
volcano_interactive_comparison(DE_comparison$dataframe)
corr= cor(DE_comparison$dataframe$log2FoldChange.x,DE_comparison$dataframe$log2FoldChange.y)We found a r=0.2230505 correlation between pairwise log2FoldChanges (p-value <0.05) indicating a moderate positive correlation between the mRNA and protein level changes, which is reinforced by the existence of discordant pairs.
DT::datatable(DE_comparison$dataframe,
rownames = FALSE,
escape = FALSE,
options = list(pageLength = 20,
scrollX=T,
autoWidth = TRUE,
columnDefs = list(list(width = '500px', targets = 1)),
dom = 'Blfrtip'))Overall, these single platform analyses highlighted obvious differences between transcriptomics and proteomics and should be further studied to differentiate technical versus biological effects. Overall, results highlight that:
Protein with a positive Log2FC but negative mRNA regulation may point to those accumulating in the brain as AD progresses
DE_comparison$dataframe$gene_name[which(DE_comparison$dataframe$direction=='Discordant'&DE_comparison$dataframe$log2FoldChange.y>0.2)]## [1] "ACAN" "ACAT2" "ACOT8" "ACSL6" "ADAM10" "ADAR"
## [7] "AKR1A1" "AKR7A2" "ALDH18A1" "ALDH9A1" "ALMS1" "ANK1"
## [13] "ANP32A" "AP3M1" "APOE" "APP" "APRT" "ARCN1"
## [19] "ARF1" "ARF4" "ARHGDIA" "ARL1" "ARL3" "ARMC10"
## [25] "ARMC8" "ASS1" "ATP1B2" "ATP2B3" "ATP2B4" "ATP8A1"
## [31] "ATP9A" "BAG5" "BANF1" "BCL2L13" "BLOC1S2" "BLVRB"
## [37] "C1QA" "C1QC" "CACNA2D2" "CALR" "CAMK2G" "CAMSAP2"
## [43] "CAPNS1" "CBR1" "CBR3" "CBX3" "CCDC43" "CCDC91"
## [49] "CCS" "CD200" "CDK14" "CHMP2B" "CHMP7" "CHORDC1"
## [55] "CISD2" "CKB" "CLIC4" "CNN3" "CNRIP1" "COMMD10"
## [61] "COMMD2" "COPB1" "COPG1" "COPS6" "CPE" "CPNE3"
## [67] "CPNE6" "CSNK1A1" "CSTB" "CTSB" "DDOST" "DHRS7"
## [73] "DHX15" "DHX9" "DMD" "DNPEP" "DRG1" "DYNC1I2"
## [79] "EIF1" "EIF2B3" "EIF2B4" "EIF3D" "EIF3E" "EIF4A3"
## [85] "EIF4G3" "EMD" "ENO1" "EPM2AIP1" "ESD" "EXOC5"
## [91] "F3" "FBXL16" "FDPS" "FHL1" "FIBP" "FKBP4"
## [97] "FN3KRP" "FNTA" "FSD1L" "GABBR1" "GAK" "GBA"
## [103] "GCA" "GGH" "GLO1" "GNAQ" "GNAS" "GPM6A"
## [109] "GRIN1" "GSPT1" "GSPT2" "GSTM5" "GYS1" "HACD3"
## [115] "HARS2" "HBB" "HBS1L" "HMGB1" "HNRNPK" "HOOK3"
## [121] "HSDL1" "HSPB1" "HSPB6" "IFT27" "INPP1" "INPP5A"
## [127] "IPO9" "ISOC1" "ITPR1" "KCNA2" "KCNMA1" "LAMTOR2"
## [133] "LCP1" "LGALS3" "LIMS1" "LIN7C" "LMAN1" "LSM3"
## [139] "LTA4H" "LUC7L2" "LUZP1" "LYSMD2" "MAL2" "MAOB"
## [145] "MAPK1" "MAPT" "MARCKS" "MAST1" "MPC2" "MRPS27"
## [151] "MTCH1" "MTCH2" "MTX3" "NAMPT" "NANS" "NDRG4"
## [157] "NEFH" "NF1" "NLGN2" "NOP56" "NPTX1" "NQO1"
## [163] "NT5C1A" "NUDCD1" "NUDT10" "OTUD6B" "PAFAH1B1" "PAFAH1B2"
## [169] "PCYOX1" "PELO" "PFN1" "PFN2" "PGAM1" "PGD"
## [175] "PHPT1" "PIK3R4" "PITRM1" "PLAA" "PLCD1" "PLCXD3"
## [181] "PLD3" "PPIA" "PPIG" "PPIL1" "PPT1" "PRAF2"
## [187] "PREP" "PREPL" "PRKAG2" "PRNP" "PRPSAP1" "PRUNE1"
## [193] "PSMC4" "PSMD10" "PSME2" "PTPRE" "PYGB" "RAB39B"
## [199] "RALYL" "RAN" "RANBP9" "RANGAP1" "RAPGEF6" "RBBP9"
## [205] "RBP1" "RCN1" "REPS1" "RMDN3" "RNPS1" "RPN1"
## [211] "RPN2" "RSU1" "RYR2" "S100A1" "S100A16" "SACM1L"
## [217] "SAE1" "SART3" "SCAMP1" "SCAMP5" "SCARB2" "SEC13"
## [223] "SEC22B" "SEC24C" "SERPINH1" "SETD7" "SIRT3" "SKP1"
## [229] "SLC1A3" "SLC2A3" "SLC3A2" "SLC4A1" "SLC4A4" "SLC7A14"
## [235] "SLC8A1" "SMS" "SNRNP200" "SNRNP40" "SNRPA1" "SNTB2"
## [241] "SON" "SQSTM1" "SRI" "SRP54" "SRP72" "SRSF1"
## [247] "SRSF2" "SRSF7" "SSB" "STAM2" "SURF4" "SV2B"
## [253] "SYNPR" "SYP" "SYT5" "TBCB" "TMA7" "TMEM11"
## [259] "TMEM163" "TMEM30A" "TMEM65" "TMX3" "TOP1" "TPP1"
## [265] "TRA2A" "TRAPPC10" "TSN" "TSNAX" "TSR2" "TXNRD1"
## [271] "UBA1" "UBAC1" "UBLCP1" "UFC1" "UFL1" "VCL"
## [277] "VPS18" "VPS25" "VPS29" "VTN" "WDR1" "XPO7"
background=get_background(multiomics_object_MTG)
scores_group=DE_comparison$dataframe[,c('gene_name','padj.x','padj.y')]
scores_group=scores_group[!duplicated(scores_group$gene_name), ]
rownames(scores_group)=scores_group$gene_name
scores_group=scores_group[,2:3]
colnames(scores_group)=c('Transcriptomics','Proteomics')
scores_group=as.matrix(scores_group)
ActivePathways=ActivePathways::ActivePathways(scores_group,
gmt=GO_fp,
background = background,
cutoff=0.5)## 53 terms were removed from gmt because they did not make the geneset.filter
By default, the vertical integration is performed on processed slots.
DIABLO is a supervised multi-omics integration model
that requires the input of a dependent variable. The function will
perform tuning on the defined range, and number of components
chosen.
multiomics_object_MTG=vertical_integration(multiomics_object_MTG,
slots = c(
"rna_processed",
"protein_processed"
),
integration='DIABLO',
intersect_genes=FALSE,
ID_type='gene_name',
dependent='diagnosis',
levels= c("Control", "AD"),
design="cor",
ncomp=2,
range=list(mRNA=seq(10,100,by=10),
proteins=seq(10,100,by=10)))## harmonizing input:
## removing 68 sampleMap rows not in names(experiments)
##
## ── MODEL TUNING ──
##
## Design matrix has changed to include Y; each block will be
## linked to Y.
##
## You have provided a sequence of keepX of length: 10 for block mRNA and 10 for block proteins.
## This results in 100 models being fitted for each component and each nrepeat, this may take some time to run, be patient!
##
## You can look into the 'BPPARAM' argument to speed up computation time.
##
## tuning component 1
##
|
| | 0%
|
|============== | 20%
|
|============================ | 40%
|
|========================================== | 60%
|
|======================================================== | 80%
|
|======================================================================| 100%
## tuning component 2
##
|
| | 0%
|
|============== | 20%
|
|============================ | 40%
|
|========================================== | 60%
|
|======================================================== | 80%
|
|======================================================================| 100%
## Design matrix has changed to include Y; each block will be
## linked to Y.
## ✔ VERTICAL INTEGRATION WITH DIABLO
The user may want to perform the integrative downstream analyses
step-by-step, or to run integrative_results_supervised,
which will create a results object containing, multi-omics signatures,
network visualisation, cell type and functional enrichment, as well as
comparison of signatures against the OpenTargets knowledge base.
integrative_results_MTG=integrative_results_supervised(multiomics_object_MTG,
integration = "DIABLO",
component = 1,
correlation_threshold=0.4,
enrichment_method='enrichr',
disease_id='MONDO_0004975',
simplify=FALSE,
compare_communities=FALSE)

## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2021... Done.
## Querying GO_Cellular_Component_2021... Done.
## Querying GO_Biological_Process_2021... Done.
## Querying WikiPathways_2021_Human... Done.
## Querying Reactome_2016... Done.
## Querying KEGG_2021_Human... Done.
## Querying MSigDB_Hallmark_2020... Done.
## Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2021... Done.
## Querying GO_Cellular_Component_2021... Done.
## Querying GO_Biological_Process_2021... Done.
## Querying WikiPathways_2021_Human... Done.
## Querying Reactome_2016... Done.
## Querying KEGG_2021_Human... Done.
## Querying MSigDB_Hallmark_2020... Done.
## Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2021... Done.
## Querying GO_Cellular_Component_2021... Done.
## Querying GO_Biological_Process_2021... Done.
## Querying WikiPathways_2021_Human... Done.
## Querying Reactome_2016... Done.
## Querying KEGG_2021_Human... Done.
## Querying MSigDB_Hallmark_2020... Done.
## Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2021... Done.
## Querying GO_Cellular_Component_2021... Done.
## Querying GO_Biological_Process_2021... Done.
## Querying WikiPathways_2021_Human... Done.
## Querying Reactome_2016... Done.
## Querying KEGG_2021_Human... Done.
## Querying MSigDB_Hallmark_2020... Done.
## Querying BioCarta_2016... Done.
## Parsing results... Done.
integrative_results_MTG$detrimental$multiomics_signature## $rna
## [1] "H1F0" "CARD11" "PALM3" "XPC" "FRAT1" "DRC1" "SLC22A1"
## [8] "NRIP2" "NRROS" "IFI27L1"
##
## $protein
## [1] "SLC7A14" "ISOC1" "EEF1E1" "DDX39B" "MAPT" "TPP1" "DARS1"
## [8] "EIF4A2"
interactive_network(integrative_results_MTG$detrimental$network$graph, communities = FALSE)